import requests
from fake_useragent import UserAgent
from lxml import etree
from urllib.request import *
from time import sleep
from random import randint

session = requests.Session()


def get_html(url):
    headers = {
        "User-Agent": UserAgent().chrome
    }
    proxies = {
        "http": "122.4.45.140:9999",
    }
    sleep(randint(3, 10))
    # request = Request(url, headers=headers)
    # proxy_handler = ProxyHandler({"http": "122.4.45.140:9999"})
    # opener = build_opener(proxy_handler)
    #
    # response = opener.open(request)
    #
    # return response.read().decode()
    response = requests.get(url, headers=headers, proxies=proxies)
    response.encoding = "utf-8"
    if response.status_code == 200:
        return response.text
    else:
        return None


def parse_index(html):
    etree_html = etree.HTML(html)
    all_url = etree_html.xpath("//div[@class='movie-item-hover']/a/@href")
    return [f"https://maoyan.com{url}" for url in all_url]


def format_actors(actors):
    actors_set = set()
    for actor in actors:
        actors_set.add(actor.strip())
    return actors_set


def parse_movie(html):
    etree_html = etree.HTML(html)
    name = etree_html.xpath("//h1/text()")
    actors = etree_html.xpath(
        "//ul[@class='celebrity-list clearfix']/li[@class='celebrity actor']/div[@class='info']/a/text()")
    types = etree_html.xpath("//li[@class='ellipsis']/a/text()")
    return {
        "name": name,
        "actors": format_actors(actors),
        "types": types
    }


def main():
    index_url = "https://maoyan.com/films?showType=3"
    html = get_html(index_url)
    movie_urls = parse_index(html)
    # print(movie_urls)
    for url in movie_urls:
        movie_html = get_html(url)
        movie = parse_movie(movie_html)
        print(movie)


if __name__ == '__main__':
    main()
